## A Research Note on the Prevalence
## of Housing Eviction among Children
## Born in American Cities

## CODE PART E:
## Comparison to vital records data

## Ian Lundberg and Louis Donnelly

## Department of Sociology, Office of Population Research,
## and Center for Research on Child Wellbeing
## Princeton University

## Code by Ian Lundberg (ilundberg@princeton.edu)
## Last updated 8 May 2018

## Define the location where we will save figures
setwd("C:/Users/iandl/Documents/FF Eviction Prevalence")

# Data files are available at http://www.nber.org/data/vital-statistics-natality-data.html

library(tidyverse)
library(haven)
library(reshape2)
library(foreach)
library(doParallel)
library(doRNG)

# Load our imputed data for comparison
load("Intermediate/amelia.out.Rdata")
our_data <- foreach(i = 1:length(amelia.out), .combine = "rbind") %do% {
  amelia.out$imputations[[i]] %>%
    select(idnum, m1natwt, married, cm1age, cm1ethrace, cm1edu)
} %>%
  mutate(m1natwt = ifelse(is.na(m1natwt), 0, m1natwt)) %>%
  summarize(Married.unweighted = mean(married),
            Married.weighted = weighted.mean(married, w = m1natwt),
            Age.unweighted = mean(cm1age),
            Age.weighted = weighted.mean(cm1age, w = m1natwt),
            Black.unweighted = mean(cm1ethrace == "Black"),
            Black.weighted = weighted.mean(cm1ethrace == "Black", w = m1natwt),
            Hispanic.unweighted = mean(cm1ethrace == "Hispanic"),
            Hispanic.weighted = weighted.mean(cm1ethrace == "Hispanic", w = m1natwt),
            Other.unweighted = mean(cm1ethrace == "Other"),
            Other.weighted = weighted.mean(cm1ethrace == "Other", w = m1natwt),
            LessThanHS.unweighted = mean(cm1edu == "Less than HS"),
            LessThanHS.weighted = weighted.mean(cm1edu == "Less than HS", w = m1natwt),
            HighSchool.unweighted = mean(cm1edu == "HS"),
            HighSchool.weighted = weighted.mean(cm1edu == "HS", w = m1natwt),
            SomeCollege.unweighted = mean(cm1edu == "Some college"),
            SomeCollege.weighted = weighted.mean(cm1edu == "Some college", w = m1natwt),
            College.unweighted = mean(cm1edu == "College"),
            College.weighted = weighted.mean(cm1edu == "College", w = m1natwt)) %>%
  melt(id = NULL) %>%
  separate(variable, into = c("variable","estimate")) %>%
  mutate(variable = factor(1:n(), labels = variable)) %>%
  spread(key = "estimate", value = "value")


# Read the data files
d1 <- read_dta("Data/natl1998.dta") %>%
  select(citrspop, dmar, meduc6, orracem, dmage)
d2 <- read_dta("Data/natl1999.dta") %>%
  select(citrspop, dmar, meduc6, orracem, dmage)
d3 <- read_dta("Data/natl2000.dta") %>%
  select(citrspop, dmar, meduc6, orracem, dmage)

## Summarize for all births

vital_records_all <- d1 %>%
  bind_rows(d2) %>%
  bind_rows(d3) %>%
  mutate(married = case_when(dmar == 1 ~ 1,
                             dmar == 2 ~ 0),
         cm1edu = factor(case_when(meduc6 %in% (1:2) ~ 1,
                                   meduc6 == 3 ~ 2,
                                   meduc6 == 4 ~ 3,
                                   meduc6 == 5 ~ 4),
                         labels = c("Less than HS",
                                    "High school",
                                    "Some college",
                                    "College")),
         cm1ethrace = case_when(orracem == 7 ~ "Black",
                                orracem <= 5 ~ "Hispanic",
                                orracem != 9 ~ "Other"),
         cm1age = dmage) %>%
  summarize(Married = mean(married, na.rm = T),
            missing.Married = mean(is.na(married)),
            Age = mean(cm1age, na.rm = T),
            missing.Age = mean(is.na(cm1age)),
            Black = mean(cm1ethrace == "Black", na.rm = T),
            Hispanic = mean(cm1ethrace == "Hispanic", na.rm = T),
            Other = mean(cm1ethrace == "Other", na.rm = T),
            missing.Race = mean(is.na(cm1ethrace)),
            LessThanHS = mean(cm1edu == "Less than HS", na.rm = T),
            HighSchool = mean(cm1edu == "High school", na.rm = T),
            SomeCollege = mean(cm1edu == "Some college", na.rm = T),
            College = mean(cm1edu == "College", na.rm = T),
            missing.Educ = mean(is.na(cm1edu))) %>%
  melt(id = NULL, value.name = "VitalRecords_All")

## Summarize births to residents of cities with populations > 250,000

vital_records_cities <- d1 %>%
  bind_rows(d2) %>%
  bind_rows(d3) %>%
  filter(citrspop %in% c("0","1","2")) %>%
  mutate(married = case_when(dmar == 1 ~ 1,
                             dmar == 2 ~ 0),
         cm1edu = factor(case_when(meduc6 %in% (1:2) ~ 1,
                                   meduc6 == 3 ~ 2,
                                   meduc6 == 4 ~ 3,
                                   meduc6 == 5 ~ 4),
                         labels = c("Less than HS",
                                    "High school",
                                    "Some college",
                                    "College")),
         cm1ethrace = case_when(orracem == 7 ~ "Black",
                                orracem <= 5 ~ "Hispanic",
                                orracem != 9 ~ "Other"),
         cm1age = dmage) %>%
  summarize(Married = mean(married, na.rm = T),
            missing.Married = mean(is.na(married)),
            Age = mean(cm1age, na.rm = T),
            missing.Age = mean(is.na(cm1age)),
            Black = mean(cm1ethrace == "Black", na.rm = T),
            Hispanic = mean(cm1ethrace == "Hispanic", na.rm = T),
            Other = mean(cm1ethrace == "Other", na.rm = T),
            missing.Race = mean(is.na(cm1ethrace)),
            LessThanHS = mean(cm1edu == "Less than HS", na.rm = T),
            HighSchool = mean(cm1edu == "High school", na.rm = T),
            SomeCollege = mean(cm1edu == "Some college", na.rm = T),
            College = mean(cm1edu == "College", na.rm = T),
            missing.Educ = mean(is.na(cm1edu))) %>%
  melt(id = NULL, value.name = "VitalRecords_Cities")

write.csv(vital_records_cities %>%
            left_join(vital_records_all, by = "variable"),
          file = "Output/vital_records_withMissingPercent.csv")

write.csv(our_data %>%
            left_join(vital_records, by = "variable"),
          file = "Output/vital_records_comparison.csv")

